Used veriety of algorithms for image processing in the pre-train step
Simple convolutional neural network :
Transfer learning on a pre-trained ResNet50
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(5, 5))
fig.tight_layout()
for a, i in zip(axes.flat, range(9)):
file_name = get_random_sample('Lemon')
a.set(xticks=[], yticks=[], title="Lemon {}".format(i+1))
a.imshow(Image.open(file_name))
Image we want network to classify :

It is not necessory to use the full dataset for training. Create a dev dataset as below - this is for bringing down training time.
train_ratio = 0.2
test_ratio = 0.2
other = 1 - train_ratio - test_ratio
train_length = int(len(all_data)* train_ratio)
test_length = int(len(all_data)* test_ratio)
lengths = [train_length , test_length, len(all_data) - train_length - test_length]
print("Size of train data = {}, \nSize of test data = {}".format(lengths[0], lengths[1]))
subsetA, subsetB, subsetC = random_split(all_data, lengths)
Size of train data = 11356, Size of test data = 11356
Following transforms are performed on images from fruits-360 dataset before we train them using a traditional neural network. We use torchvision library for this. This is optimized for pytorch.
transforms.RandomHorizontalFlip()transforms.RandomVerticalFlip()transforms.Normalize(pop_mean, pop_std)100x100 px : transforms.Resize((100, 100)),transforms.Lambda(AddGaussianNoise)For a subset of training dataset, the background is replaced by random gaussian noise. This helps in better identifying user provided images of fruits.
$ \text{Probablity} : \begin{cases} 20\% & \text{No gaussian noise} \\ \\ \\ 80\% & \text{Add background gaussian noise} \end{cases} $
to_pil_image = transforms.ToPILImage()
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(5, 5))
fig.tight_layout()
for a, i in zip(axes.flat, range(2)):
if (i == 0) :
file_name = get_random_sample('Lemon')
a.set(xticks=[], yticks=[], title="Lemon {}".format(i+1))
a.imshow(Image.open(file_name))
if (i == 1) :
t = AddGaussianNoise(Image.open(file_name), True)
a.set(xticks=[], yticks=[], title="Lemon {}".format(i+1))
a.imshow(to_pil_image(t))
pop_mean = []
pop_std = []
# This is calculated for the fruits, code below
pop_mean =[0.6838789, 0.5691154, 0.48900163]
pop_std = [0.29941455, 0.35674098, 0.38645667]
# for i, data in enumerate(data_loader):
# numpy_image = data[0].numpy()
# batch_mean = np.mean(numpy_image, axis=(0,2,3))
# batch_std = np.std(numpy_image, axis=(0,2,3))
# pop_mean.append(batch_mean)
# pop_std.append(batch_std)
# pop_mean = np.array(pop_mean).mean(axis=0)
# pop_std = np.array(pop_std).mean(axis=0)
image_transforms_CNN = {
'train': transforms.Compose([
transforms.ToPILImage(),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.Lambda(AddGaussianNoise),
# Normalize images
transforms.Normalize(pop_mean, pop_std)
]),
'test': transforms.Compose([
transforms.ToPILImage(),
transforms.ToTensor(),
# Normalize images
transforms.Normalize(pop_mean, pop_std)
]),
'test_100': transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((100, 100), interpolation = PIL.Image.NEAREST),
transforms.ToTensor(),
# Normalize images
transforms.Normalize(pop_mean, pop_std)
]),
}
# CUDA enables developers to speed up compute-intensive applications in GPU
# is it available for use ?
torch.cuda.is_available()
True
# Type of Linux machine
!lsb_release -a
No LSB modules are available. Distributor ID: Ubuntu Description: Ubuntu 18.10 Release: 18.10 Codename: cosmic
# Type of GPU
!nvidia-smi
Tue Aug 6 21:11:09 2019
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 430.26 Driver Version: 430.26 CUDA Version: 10.2 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 GeForce RTX 2070 Off | 00000000:01:00.0 Off | N/A |
| 0% 37C P8 9W / 175W | 4677MiB / 7982MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| 0 14076 C /usr/lib/anaconda3/bin/python 2079MiB |
| 0 31973 C /usr/lib/anaconda3/bin/python 2587MiB |
+-----------------------------------------------------------------------------+
Conv2d : Applies a 2D convolution over an input signal composed of several input planes.
BatchNorm2d:
Conv2dLinear: : Applies a linear transformation to the incoming data: $y = xA^T + b$
MaxPool2d : Applies a 2D max pooling over an input signal composed of several input planes.
relu: : Applies the rectified linear unit function element-wise
def calculate_size_Conv2d(H, W, kernel_size, Padding=0, dilation=1, stride=1):
return (((H + 2 * Padding - (dilation * kernel_size - 1) - 1)/stride) + 1)
# (a) self.conv_stage1 = nn.Conv2d(3, 16, kernel_size=5)
print("(a) = {} ".format(calculate_size_Conv2d(H=100, W=100, kernel_size=5)))
# (b) self.conv_stage1 = nn.Conv2d(16, 32, kernel_size=3)
print("(b) = {} ".format(calculate_size_Conv2d(H=46, W=46, kernel_size=3)))
# (c) self.conv_stage1 = nn.Conv2d(32, 64, kernel_size=3)
print("(c) = {}".format(calculate_size_Conv2d(H=23, W=23, kernel_size=3)))
(a) = 96.0 (b) = 44.0 (c) = 21.0
Above calculates image size after convolution operation.
class Net(nn.Module):
def __init__(self, num_classes):
super().__init__()
# (1) Stage1
# Input : channels=3 x width=100 x height=100
# Output : channels=16 x width=96 x height=96 from (a)
self.conv_stage1 = nn.Conv2d(3, 16, kernel_size=5)
# (2)
# Input : channels=16 x width=96 x height=96
# Output : channels=16 x width=96 x height=96
self.batch_norm_stage1 = nn.BatchNorm2d(num_features=16)
# (3) MaxPool2d
# Input : channels=16 x width=96 x height=96
# Output : channels=16 x width=48 x height=48
# (4) Stage2
# Input : channels=16 x width=48 x height=48
# Output : channels=32 x width=46 x height=46 from (b)
self.conv_stage2 = nn.Conv2d(16, 32, kernel_size=3)
# (5)
# Input : channels=32 x width=46 x height=46
# Output : channels=32 x width=46 x height=46
self.batch_norm_stage2 = nn.BatchNorm2d(num_features=32)
# (6) MaxPool2d
# Input : channels=32 x width=46 x height=46
# Output : channels=32 x width=23 x height=23
# (7)
# Input : channels=32 x width=23 x height=23
# Output : channels=64 x width=21 x height=21
self.conv_stage3 = nn.Conv2d(32, 64, kernel_size=3)
# (8)
# Input : channels=64 x width=21 x height=21
# Output : channels=64 x width=21 x height=21
self.batch_norm_stage3 = nn.BatchNorm2d(num_features=64)
# (9) MaxPool2d
# Input : channels=64 x width=21 x height=21
# Output : channels=64 x width=10 x height=10
# Pooling layer common to all stages
self.pool = nn.MaxPool2d(2, 2)
self.linear_stage1 = nn.Linear(64 * 10 * 10, 250)
self.linear_stage2 = nn.Linear(250, num_classes)
def forward(self, x):
x = self.pool(F.relu(self.batch_norm_stage1(self.conv_stage1(x))))
x = self.pool(F.relu(self.batch_norm_stage2(self.conv_stage2(x))))
x = self.pool(F.relu(self.batch_norm_stage3(self.conv_stage3(x))))
# From (9) we get 64 * 10 * 10
x = x.view(-1, 64 * 10 * 10)
x = F.dropout(F.relu(self.linear_stage1(x)), p=0.4)
x = self.linear_stage2(x)
return F.log_softmax(x, dim=1)
model = Net(len(class_names))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
summary(model, (3, 100, 100))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 16, 96, 96] 1,216
BatchNorm2d-2 [-1, 16, 96, 96] 32
MaxPool2d-3 [-1, 16, 48, 48] 0
Conv2d-4 [-1, 32, 46, 46] 4,640
BatchNorm2d-5 [-1, 32, 46, 46] 64
MaxPool2d-6 [-1, 32, 23, 23] 0
Conv2d-7 [-1, 64, 21, 21] 18,496
BatchNorm2d-8 [-1, 64, 21, 21] 128
MaxPool2d-9 [-1, 64, 10, 10] 0
Linear-10 [-1, 250] 1,600,250
Linear-11 [-1, 111] 27,861
================================================================
Total params: 1,652,687
Trainable params: 1,652,687
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.11
Forward/backward pass size (MB): 4.18
Params size (MB): 6.30
Estimated Total Size (MB): 10.59
----------------------------------------------------------------
Training the network
`loss.backward()` : Does backpropagation
train_losses, test_losses = [], []
def train(epoch, model, train_loader, optimizer):
model.train()
for batch_idx, (data, target, path) in enumerate(train_loader):
data = data.to(device)
target = target.to(device)
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % 200 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data))
train_losses.append(loss/len(train_loader))
def test(model, test_loader):
since = time.time()
model.eval()
test_loss = 0
correct = 0
for data, target, path in test_loader:
data = data.to(device)
target = target.to(device)
with torch.no_grad(): data = Variable(data)
output = model(data)
# sum up batch loss
loss = F.nll_loss(output, target, size_average=False).data
test_loss += loss
# get the index of the max log-probability
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
time_elapsed = time.time() - since
print('Test complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
test_losses.append(loss/len(test_loader))
train_losses, test_losses = [], []
since = time.time()
for epoch in range(1, 20):
train(epoch, model, train_loader, optimizer)
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
Train Epoch: 1 [0/11356 (0%)] Loss: 4.699692 Train Epoch: 2 [0/11356 (0%)] Loss: 2.703226 Train Epoch: 3 [0/11356 (0%)] Loss: 1.740270 Train Epoch: 4 [0/11356 (0%)] Loss: 1.055098 Train Epoch: 5 [0/11356 (0%)] Loss: 0.813848 Train Epoch: 6 [0/11356 (0%)] Loss: 0.461516 Train Epoch: 7 [0/11356 (0%)] Loss: 0.552204 Train Epoch: 8 [0/11356 (0%)] Loss: 0.515947 Train Epoch: 9 [0/11356 (0%)] Loss: 0.254227 Train Epoch: 10 [0/11356 (0%)] Loss: 0.386445 Train Epoch: 11 [0/11356 (0%)] Loss: 0.281965 Train Epoch: 12 [0/11356 (0%)] Loss: 0.217154 Train Epoch: 13 [0/11356 (0%)] Loss: 0.160044 Train Epoch: 14 [0/11356 (0%)] Loss: 0.149429 Train Epoch: 15 [0/11356 (0%)] Loss: 0.139316 Train Epoch: 16 [0/11356 (0%)] Loss: 0.160163 Train Epoch: 17 [0/11356 (0%)] Loss: 0.144762 Train Epoch: 18 [0/11356 (0%)] Loss: 0.114994 Train Epoch: 19 [0/11356 (0%)] Loss: 0.102330 Training complete in 5m 20s
model.eval()
test(model, test_loader)
/usr/lib/anaconda3/lib/python3.6/site-packages/torch/nn/_reduction.py:46: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.
Test set: Average loss: 0.1282, Accuracy: 10925/11356 (96%) Test complete in 0m 12s
plt.plot(train_losses, label='Training loss')
plt.plot(test_losses, label='Validation loss')
plt.legend(frameon=False)
plt.show()
predict_image(model, local_batch_loader, pop_mean, pop_std)
model_resnet50 = models.resnet50(pretrained=True)
# Freeze parameters so we don't backprop through them
for param in model_resnet50.parameters():
param.requires_grad = False
model_resnet50.fc = nn.Sequential(
nn.Linear(2048, 1024),
nn.BatchNorm1d(num_features=1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.BatchNorm1d(num_features=512),
nn.ReLU(),
nn.Linear(512, len(class_names)),
nn.LogSoftmax(dim=1)
)
criterion = nn.NLLLoss()
optimizer = optim.Adam(model_resnet50.fc.parameters(), lr=0.003)
m = model_resnet50.to(device)
train_losses, test_losses = [], []
since = time.time()
for epoch in range(1,8):
train(epoch, model_resnet50, train_loader_224, optimizer)
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
Train Epoch: 1 [0/22712 (0%)] Loss: 4.696129 Train Epoch: 1 [12800/22712 (56%)] Loss: 0.158681 Train Epoch: 2 [0/22712 (0%)] Loss: 0.033980 Train Epoch: 2 [12800/22712 (56%)] Loss: 0.040421 Train Epoch: 3 [0/22712 (0%)] Loss: 0.014049 Train Epoch: 3 [12800/22712 (56%)] Loss: 0.086649 Train Epoch: 4 [0/22712 (0%)] Loss: 0.037901 Train Epoch: 4 [12800/22712 (56%)] Loss: 0.046906 Train Epoch: 5 [0/22712 (0%)] Loss: 0.128555 Train Epoch: 5 [12800/22712 (56%)] Loss: 0.040066 Train Epoch: 6 [0/22712 (0%)] Loss: 0.075880 Train Epoch: 6 [12800/22712 (56%)] Loss: 0.016025 Train Epoch: 7 [0/22712 (0%)] Loss: 0.015707 Train Epoch: 7 [12800/22712 (56%)] Loss: 0.003590 Training complete in 9m 43s
# Test the model after
model_resnet50.eval()
test(model_resnet50, test_loader_224)
Test set: Average loss: 0.0149, Accuracy: 22594/22712 (99%) Test complete in 1m 53s
plt.plot(train_losses, label='Training loss')
plt.plot(test_losses, label='Validation loss')
plt.legend(frameon=False)
plt.show()
predict_image(model_resnet50, local_batch_loader, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])